Syllogism Validation with BERT

Python
Deep Learning
NLP

Given two premises this validation model can classify validity with 85% accuracy on a 50/50 split dataset.

Author

Jake Gehri

Published

November 27, 2022

import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
import transformers
from transformers import DistilBertTokenizer
from transformers import DistilBertForSequenceClassification
from transformers import Trainer, TrainingArguments
from datasets import load_metric
import numpy as np
df = pd.read_csv('Avicenna_Train.csv', encoding='ISO-8859-1')
df.head()
Premise 1 Premise 2 Syllogistic relation Conclusion
0 unchecked imbalances in the society, will see ... correct these imbalances requires in-depth kno... no No conclusion
1 Chronic diseases are heart attacks and stroke,... In populations that eat a regular high-fiber d... yes In populations that eat a regular high-fiber d...
2 Formative assessment encourages children to en... An ideal learning environment uses formative a... yes An ideal learning environment encourages child...
3 Underrepresented female labor force in some pr... Job discrimination comes with underrepresented... yes Job discrimination comes with not being able t...
4 damaged mentality in an individual brings seri... Aggression harms the mentality of person. yes Aggression brings brings serious health proble...
df['label'] = df['Syllogistic relation'].eq('yes').mul(1)
df['text'] = (df['Premise 1'] + " : " + df['Premise 2'])
df['label'].value_counts()
1    2427
0    2373
Name: label, dtype: int64
int(len(df) * 0.8)
3840
train_texts = df.iloc[:3840]['text'].values
train_labels = df.iloc[:3840]['label'].values

valid_texts = df.iloc[3840:]['text'].values
valid_labels = df.iloc[3840:]['label'].values
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True)
valid_encodings = tokenizer(list(valid_texts), truncation=True, padding=True)
class SyllogismDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    
    def __len__(self):
        return len(self.labels)
train_dataset = SyllogismDataset(train_encodings, train_labels)
valid_dataset = SyllogismDataset(valid_encodings, valid_labels)
train_dataloader = torch.utils.data.DataLoader2(train_dataset, batch_size=16, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader2(valid_dataset, batch_size=16, shuffle=True)
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
DEVICE = 'cuda'
model.train()

metrics = load_metric('accuracy')
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    
    predictions = np.argmax(logits, axis=-1)
    return metrics.compute(predictions=predictions, references=labels)
training_args = TrainingArguments(output_dir='./results', num_train_epochs=3, per_device_train_batch_size=16,
                                 per_device_eval_batch_size=16, logging_dir='./logs', logging_steps=72)

trainer = Trainer(model=model, 
                  args=training_args, 
                  train_dataset=train_dataset, 
                  eval_dataset=valid_dataset,
                  compute_metrics=compute_metrics
                 )
trainer.train()
/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
  warnings.warn(
***** Running training *****
  Num examples = 3840
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 720
[720/720 01:36, Epoch 3/3]
Step Training Loss
72 0.658500
144 0.492300
216 0.413400
288 0.298300
360 0.253200
432 0.216700
504 0.178600
576 0.106900
648 0.106800
720 0.091800

Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)

TrainOutput(global_step=720, training_loss=0.281636557314131, metrics={'train_runtime': 97.56, 'train_samples_per_second': 118.081, 'train_steps_per_second': 7.38, 'total_flos': 289110097566720.0, 'train_loss': 0.281636557314131, 'epoch': 3.0})
trainer.evaluate()
***** Running Evaluation *****
  Num examples = 960
  Batch size = 16
[60/60 02:07]
{'eval_loss': 0.4387502670288086,
 'eval_accuracy': 0.88125,
 'eval_runtime': 2.2301,
 'eval_samples_per_second': 430.476,
 'eval_steps_per_second': 26.905,
 'epoch': 3.0}
df_test = pd.read_csv('Avicenna_Test.csv', encoding='ISO-8859-1')

df_test['label'] = df_test['Syllogistic relation'].eq('yes').mul(1)
df_test['text'] = (df_test['Premise 1'] + " : " + df_test['Premise 2'])

test_texts = df_test['text'].values
test_labels = df_test['label'].values

test_encodings = tokenizer(list(test_texts), truncation=True, padding=True)

test_dataset = SyllogismDataset(test_encodings, test_labels)

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=True)
trainer.evaluate(test_dataset)
***** Running Evaluation *****
  Num examples = 1200
  Batch size = 16
{'eval_loss': 0.5759531855583191,
 'eval_accuracy': 0.8525,
 'eval_runtime': 2.8515,
 'eval_samples_per_second': 420.837,
 'eval_steps_per_second': 26.302,
 'epoch': 3.0}
sample_text = ['Socrates is a man : all men are mortal']
sample_label = [1]
sample_encoded = tokenizer(sample_text, truncation=True, padding=True)
sample_dataset = SyllogismDataset(sample_encoded, sample_label)
sample_dataset
<__main__.SyllogismDataset at 0x7f63a4fccd60>
trainer.predict(sample_dataset).label_ids
***** Running Prediction *****
  Num examples = 1
  Batch size = 16
array([1])
sample_text_2 = ['If the streets are wet, it has rained recently : The streets are wet.']
sample_label_2 = [0]

sample_encoded_2 = tokenizer(sample_text_2, truncation=True, padding=True)

sample_dataset_2 = SyllogismDataset(sample_encoded_2, sample_label_2)

trainer.predict(sample_dataset_2).label_ids
***** Running Prediction *****
  Num examples = 1
  Batch size = 16
array([0])